Data prep

suppressWarnings(library(MendelianRandomization))
suppressWarnings(library(digest))
suppressWarnings(library(tidyr))                         
suppressWarnings(library(MRPRESSO))
suppressWarnings(library(rowr))
suppressWarnings(library(ggplot2))
suppressWarnings(library(dplyr))
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:rowr':
## 
##     coalesce, count
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
setwd('C:/Users/marta/Desktop/IC/Thesis/Genetic Variants')

diabetes_b<-readRDS('C:/Users/marta/Desktop/IC/Thesis/Genetic Variants/Diabetes/diabetes_b.rds')
diabetes_p<-readRDS('C:/Users/marta/Desktop/IC/Thesis/Genetic Variants/Diabetes/diabetes_p.rds')
breast<-readRDS('C:/Users/marta/Desktop/IC/Thesis/Genetic Variants/Breast Cancer/breast.rds')
prostate<-readRDS('C:/Users/marta/Desktop/IC/Thesis/Genetic Variants/Prostate Cancer/prostate.rds')
breast= separate(data=breast, col=phase3_1kg_id, into=c('SNP','rest'), sep= "\\:")
## Warning: Expected 2 pieces. Additional pieces discarded in 111 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
groups<-read.csv('C:/Users/marta/Desktop/IC/Thesis/Genetic Variants/groups.csv', stringsAsFactors = TRUE, header=TRUE)
rs_b<-breast$SNP
index<-match(rs_b,groups$RS_ID)
groups_b<-groups[index,]

rs_p<-prostate$SNP
index<-match(rs_p,groups$RS_ID)
groups_p<-groups[index,]


MRInputObject_breast <- mr_input(bx = diabetes_b$Effect,
                          bxse = diabetes_b$StdErr,
                          by = breast$bcac_onco_icogs_gwas_beta,
                          byse = breast$bcac_onco_icogs_gwas_se, outcome = 'breast cancer', exposure='diabetes',snps=rs_b)

MRAllObject_all_breast <- mr_allmethods(MRInputObject_breast, method = "all")

Breast cancer MR effect estimates:

MRAllObject_all_breast
##                     Method Estimate Std Error 95% CI        P-value
##              Simple median   -0.004     0.019  -0.041 0.033   0.837
##            Weighted median    0.011     0.018  -0.024 0.045   0.538
##  Penalized weighted median   -0.004     0.018  -0.039 0.031   0.825
##                                                                    
##                        IVW    0.000     0.017  -0.033 0.033   0.998
##              Penalized IVW   -0.007     0.013  -0.033 0.019   0.581
##                 Robust IVW    0.005     0.023  -0.040 0.050   0.819
##       Penalized robust IVW   -0.008     0.016  -0.040 0.024   0.612
##                                                                    
##                   MR-Egger    0.029     0.035  -0.040 0.098   0.407
##                (intercept)   -0.003     0.003  -0.009 0.003   0.344
##         Penalized MR-Egger    0.003     0.031  -0.058 0.064   0.926
##                (intercept)   -0.001     0.002  -0.006 0.004   0.743
##            Robust MR-Egger    0.046     0.066  -0.083 0.176   0.484
##                (intercept)   -0.004     0.005  -0.013 0.006   0.447
##  Penalized robust MR-Egger   -0.005     0.041  -0.085 0.076   0.908
##                (intercept)    0.000     0.003  -0.006 0.006   0.939
mr_plot(MRInputObject_breast,
        error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_breast)

MR-PRESSO

#MR-PRESSO breast

input = data.frame(betaY=breast$bcac_onco_icogs_gwas_beta, seY=breast$bcac_onco_icogs_gwas_se,beta_diabetes=diabetes_b$Effect, se_diabetes=diabetes_b$StdErr,row.names= NULL)

mrpresso_out = mr_presso(BetaOutcome = "betaY", BetaExposure = c("beta_diabetes"), SdOutcome = "seY", SdExposure="se_diabetes", data=input, OUTLIERtest = TRUE, DISTORTIONtest = TRUE, NbDistribution = 10000)

mrpresso_out$`Main MR results`
##        Exposure       MR Analysis Causal Estimate         Sd       T-stat
## 1 beta_diabetes               Raw    4.022679e-05 0.01704120  0.002360562
## 2 beta_diabetes Outlier-corrected   -7.200820e-03 0.01400442 -0.514181786
##     P-value
## 1 0.9981208
## 2 0.6082263

MR-PRESSO Distortion test

mrpresso_out$`MR-PRESSO results`$`Distortion Test`$`Distortion Coefficient`
## beta_diabetes 
##      100.5586
mrpresso_out$`MR-PRESSO results`$`Distortion Test`$Pvalue
## [1] 0.222

MR-PRESSO Outliers detected

out_indices = mrpresso_out$`MR-PRESSO results`$`Distortion Test`$`Outliers Indices`
outliers = rs_b[out_indices]

data_b<-cbind(diabetes_b, breast)
data_b <- data_b[, !duplicated(colnames(data_b))]

# creating a scatterplot
ggplot(data = data_b, aes(x = diabetes_b$Effect, y = breast$bcac_onco_icogs_gwas_beta, color =ifelse(data_b$SNP%in%outliers,yes='red',no='blue'))) + 
  geom_point(size=2) +
  labs(title = "MR analysis\n", x = "T2D", y = "Breast cancer", color = "Genetic variants\n") +
  scale_color_manual(labels = c("Rest of SNPs", "outliers"), values = c("blue", "red")) +
  theme_bw() +
  theme(axis.text.x = element_text(size = 14), axis.title.x = element_text(size = 16),
        axis.text.y = element_text(size = 14), axis.title.y = element_text(size = 16),
        plot.title = element_text(size = 20, face = "bold", color = "darkgreen"))

Scatter plot of grouped SNPs

# creating a scatterplot
ggplot(data = data_b, aes(x = diabetes_b$Effect, y = breast$bcac_onco_icogs_gwas_beta)) +
 geom_point(aes(color=factor(groups_b$TRAIT)),size=2) +
  labs(title = "MR analysis\n", x = "T2D", y = "Breast cancer", color = "Genetic variants\n")

# Stratified MR analyses

Insulin

data_b<-cbind(data_b, groups_b)
data_b_insulin<-data_b[data_b$TRAIT=='INSULIN RESISTANCE',]

MRInputObject_breast_insulin <- mr_input(bx = data_b_insulin$Effect,
                          bxse = data_b_insulin$StdErr,
                          by = data_b_insulin$bcac_onco_icogs_gwas_beta,
                          byse = data_b_insulin$bcac_onco_icogs_gwas_se, outcome = 'breast cancer', exposure='diabetes',snps=data_b_insulin$SNP)

MRAllObject_all_breast_insulin <- mr_allmethods(MRInputObject_breast_insulin, method = "all")
MRAllObject_all_breast_insulin
##                     Method Estimate Std Error 95% CI         P-value
##              Simple median   -0.051     0.030  -0.110  0.008   0.091
##            Weighted median    0.075     0.027   0.021  0.128   0.006
##  Penalized weighted median    0.104     0.022   0.061  0.147   0.000
##                                                                     
##                        IVW    0.013     0.026  -0.038  0.065   0.609
##              Penalized IVW   -0.065     0.024  -0.112 -0.018   0.007
##                 Robust IVW   -0.013     0.084  -0.178  0.151   0.874
##       Penalized robust IVW   -0.069     0.024  -0.117 -0.021   0.005
##                                                                     
##                   MR-Egger    0.129     0.050   0.031  0.227   0.010
##                (intercept)   -0.015     0.006  -0.027 -0.004   0.009
##         Penalized MR-Egger    0.142     0.048   0.049  0.236   0.003
##                (intercept)   -0.016     0.005  -0.027 -0.005   0.004
##            Robust MR-Egger    0.133     0.057   0.021  0.245   0.020
##                (intercept)   -0.016     0.006  -0.027 -0.004   0.006
##  Penalized robust MR-Egger    0.145     0.045   0.057  0.233   0.001
##                (intercept)   -0.016     0.005  -0.026 -0.007   0.001
mr_plot(MRInputObject_breast_insulin,
        error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_breast_insulin)

Glucose

data_b<-cbind(data_b, groups_b)
data_b_glucose<-data_b[data_b$TRAIT=='GLUCOSE',]

MRInputObject_breast_glucose <- mr_input(bx = data_b_glucose$Effect,
                          bxse = data_b_glucose$StdErr,
                          by = data_b_glucose$bcac_onco_icogs_gwas_beta,
                          byse = data_b_glucose$bcac_onco_icogs_gwas_se, outcome = 'breast cancer', exposure='diabetes',snps=data_b_glucose$SNP)

MRAllObject_all_breast_glucose <- mr_allmethods(MRInputObject_breast_glucose, method = "all")
MRAllObject_all_breast_glucose
##                     Method Estimate Std Error 95% CI        P-value
##              Simple median    0.014     0.039  -0.063 0.090   0.726
##            Weighted median   -0.031     0.038  -0.106 0.043   0.411
##  Penalized weighted median   -0.024     0.038  -0.099 0.051   0.529
##                                                                    
##                        IVW   -0.021     0.042  -0.102 0.061   0.619
##              Penalized IVW    0.017     0.034  -0.049 0.083   0.618
##                 Robust IVW   -0.013     0.039  -0.089 0.064   0.748
##       Penalized robust IVW    0.011     0.031  -0.050 0.072   0.724
##                                                                    
##                   MR-Egger   -0.083     0.133  -0.344 0.177   0.530
##                (intercept)    0.006     0.011  -0.016 0.028   0.618
##         Penalized MR-Egger   -0.090     0.090  -0.266 0.086   0.318
##                (intercept)    0.008     0.007  -0.007 0.022   0.283
##            Robust MR-Egger   -0.084     0.112  -0.303 0.135   0.450
##                (intercept)    0.006     0.009  -0.011 0.024   0.483
##  Penalized robust MR-Egger   -0.091     0.079  -0.246 0.065   0.253
##                (intercept)    0.008     0.007  -0.006 0.022   0.256
mr_plot(MRInputObject_breast_glucose,
        error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_breast_glucose)

Lipids

data_b<-cbind(data_b, groups_b)
data_b_lipid<-data_b[data_b$TRAIT=='LIPID',]

MRInputObject_breast_lipid <- mr_input(bx = data_b_lipid$Effect,
                          bxse = data_b_lipid$StdErr,
                          by = data_b_lipid$bcac_onco_icogs_gwas_beta,
                          byse = data_b_lipid$bcac_onco_icogs_gwas_se, outcome = 'breast cancer', exposure='diabetes',snps=data_b_lipid$SNP)

MRAllObject_all_breast_lipid <- mr_allmethods(MRInputObject_breast_lipid, method = "all")
MRAllObject_all_breast_lipid
##                     Method Estimate Std Error 95% CI        P-value
##              Simple median   -0.052     0.092  -0.232 0.128   0.574
##            Weighted median   -0.022     0.063  -0.145 0.101   0.720
##  Penalized weighted median   -0.021     0.063  -0.145 0.102   0.736
##                                                                    
##                        IVW   -0.042     0.097  -0.233 0.148   0.664
##              Penalized IVW   -0.012     0.052  -0.114 0.091   0.825
##                 Robust IVW   -0.026     0.036  -0.097 0.045   0.476
##       Penalized robust IVW   -0.015     0.039  -0.091 0.061   0.693
##                                                                    
##                   MR-Egger    0.192     0.203  -0.205 0.590   0.343
##                (intercept)   -0.014     0.011  -0.035 0.007   0.195
##         Penalized MR-Egger    0.192     0.203  -0.205 0.590   0.343
##                (intercept)   -0.014     0.011  -0.035 0.007   0.195
##            Robust MR-Egger    0.173     0.191  -0.201 0.547   0.365
##                (intercept)   -0.013     0.013  -0.038 0.013   0.321
##  Penalized robust MR-Egger    0.173     0.191  -0.201 0.547   0.365
##                (intercept)   -0.013     0.013  -0.038 0.013   0.321
mr_plot(MRInputObject_breast_lipid,
        error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_breast_lipid)

Prostate cancer MR effect estimates:

MRInputObject_prostate <- mr_input(bx = diabetes_p$Effect,
                          bxse = diabetes_p$StdErr,
                          by = prostate$Effect,
                          byse = prostate$StdErr, outcome = 'prostate cancer', exposure='diabetes',snps=rs_p)

MRAllObject_all_prostate <- mr_allmethods(MRInputObject_prostate, method = "all")
MRAllObject_all_prostate 
##                     Method Estimate Std Error 95% CI        P-value
##              Simple median    0.011     0.024  -0.035 0.058   0.632
##            Weighted median    0.001     0.022  -0.042 0.045   0.947
##  Penalized weighted median   -0.005     0.022  -0.049 0.039   0.819
##                                                                    
##                        IVW   -0.041     0.042  -0.124 0.042   0.333
##              Penalized IVW   -0.010     0.016  -0.042 0.022   0.543
##                 Robust IVW   -0.007     0.018  -0.042 0.028   0.690
##       Penalized robust IVW   -0.012     0.016  -0.043 0.020   0.469
##                                                                    
##                   MR-Egger   -0.113     0.100  -0.309 0.084   0.260
##                (intercept)    0.006     0.007  -0.009 0.021   0.428
##         Penalized MR-Egger   -0.048     0.038  -0.122 0.027   0.213
##                (intercept)    0.003     0.003  -0.003 0.009   0.297
##            Robust MR-Egger   -0.020     0.039  -0.097 0.056   0.603
##                (intercept)    0.001     0.003  -0.004 0.006   0.690
##  Penalized robust MR-Egger   -0.045     0.032  -0.108 0.017   0.155
##                (intercept)    0.003     0.002  -0.002 0.007   0.274
mr_plot(MRInputObject_prostate,
        error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_prostate)

MR-PRESSO prostate

#MR-PRESSO prostate

input = data.frame(betaY=prostate$Effect,seY=prostate$StdErr,beta_diabetes=diabetes_p$Effect,se_diabetes=diabetes_p$StdErr, row.names= NULL)

mrpresso_out = mr_presso(BetaOutcome = "betaY", BetaExposure = c("beta_diabetes"), SdOutcome = "seY", SdExposure="se_diabetes", data=input, OUTLIERtest = TRUE, DISTORTIONtest = TRUE, NbDistribution = 10000)

mrpresso_out$`Main MR results`
##        Exposure       MR Analysis Causal Estimate         Sd     T-stat
## 1 beta_diabetes               Raw     -0.04085910 0.04223478 -0.9674276
## 2 beta_diabetes Outlier-corrected     -0.01429709 0.01722820 -0.8298654
##     P-value
## 1 0.3360120
## 2 0.4089954

MR-PRESSO Distortion test

mrpresso_out$`MR-PRESSO results`$`Distortion Test`$`Distortion Coefficient`
## beta_diabetes 
##     -185.7861
mrpresso_out$`MR-PRESSO results`$`Distortion Test`$Pvalue
## [1] 0.034

MR-PRESSO Outliers detected

out_indices = mrpresso_out$`MR-PRESSO results`$`Distortion Test`$`Outliers Indices`
outliers=rs_p[out_indices]
colnames(prostate)[13]<-'Effect_p'
colnames(prostate)[14]<-'StdErr_p'

data_p<-cbind(diabetes_p, prostate)
data_p <- data_p[, !duplicated(colnames(data_p))]

ggplot(data = data_p, aes(x = diabetes_p$Effect, y = prostate$Effect, color =ifelse(data_p$SNP%in%outliers,yes='red',no='blue'))) + 
  geom_point(size=2) +
  labs(title = "MR analysis\n", x = "T2D", y = "Prostate cancer", color = "Genetic variants\n") +
  scale_color_manual(labels = c("Rest of SNPs", "outliers"), values = c("blue", "red")) +
  theme_bw() +
  theme(axis.text.x = element_text(size = 14), axis.title.x = element_text(size = 16),
        axis.text.y = element_text(size = 14), axis.title.y = element_text(size = 16),
        plot.title = element_text(size = 20, face = "bold", color = "darkgreen"))

# #extract rs numbers below 5x10^-8
# 
# setwd('C:/Users/marta/Desktop/IC/Thesis/Genetic variants/Pathways')
# 
# # where is the summary data stored?
# name_in = "/Users/marta/Desktop/IC/Thesis/Genetic variants/Pathways/"
# # what are the files in the folder?
# os_list_dir=list.files(name_in)
# os_list_dir=sort(os_list_dir)
# 
# os_list_dir
# 
# 
# # where to save the summary data for the instruments?
# name_out = "/Users/marta/Desktop/IC/Thesis/New"
# 
# if(!file.exists(name_out)){dir.create(file.path(name_out))}
# 
# 
# for (i in 1:length(os_list_dir)){
#   print(paste("reading: ", os_list_dir[i]))
#   data<-read.csv(os_list_dir[i], stringsAsFactors = FALSE, header = TRUE)
#   output1 = cbind.fill(output1,data[,10], fill=NA)
# }
# col
# #colnames(output1)<-sub(".csv","",col)
# 
# filename = sprintf("pathways.csv", name_out, rf)
# 
# write.csv(file=filename, output)

Scatter plot of grouped SNPs

ggplot(data = data_p, aes(x = diabetes_p$Effect, y = prostate$Effect)) +
 geom_point(aes(color=factor(groups_p$TRAIT)),size=2) +
  labs(title = "MR analysis\n", x = "T2D", y = "Prostate cancer", color = "Genetic variants\n")

MR stratified analysis

Insulin

data_p<-cbind(data_p, groups_p)
data_p_insulin<-data_p[data_p$TRAIT=='INSULIN RESISTANCE',]

MRInputObject_prostate_insulin <- mr_input(bx = data_p_insulin$Effect,
                          bxse = data_p_insulin$StdErr,
                          by = data_p_insulin$Effect_p,
                          byse = data_p_insulin$StdErr_p, outcome = 'prostate cancer', exposure='diabetes',snps=data_p_insulin$SNP)

MRAllObject_all_prostate <- mr_allmethods(MRInputObject_prostate_insulin, method = "all")
MRAllObject_all_prostate 
##                     Method Estimate Std Error 95% CI         P-value
##              Simple median    0.036     0.039  -0.039  0.112   0.346
##            Weighted median   -0.004     0.036  -0.074  0.066   0.918
##  Penalized weighted median   -0.018     0.036  -0.088  0.052   0.615
##                                                                     
##                        IVW    0.024     0.038  -0.049  0.098   0.517
##              Penalized IVW    0.005     0.033  -0.059  0.070   0.868
##                 Robust IVW    0.017     0.036  -0.054  0.087   0.645
##       Penalized robust IVW    0.003     0.028  -0.053  0.059   0.915
##                                                                     
##                   MR-Egger   -0.093     0.095  -0.280  0.094   0.331
##                (intercept)    0.012     0.009  -0.006  0.030   0.183
##         Penalized MR-Egger   -0.139     0.077  -0.290  0.012   0.071
##                (intercept)    0.015     0.007   0.000  0.029   0.043
##            Robust MR-Egger   -0.123     0.075  -0.270  0.023   0.099
##                (intercept)    0.014     0.008  -0.002  0.031   0.083
##  Penalized robust MR-Egger   -0.146     0.065  -0.274 -0.019   0.025
##                (intercept)    0.016     0.008   0.000  0.032   0.050
mr_plot(MRInputObject_prostate_insulin,
        error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_prostate)

Glucose

data_p<-cbind(data_p, groups_p)
data_p_glucose<-data_p[data_p$TRAIT=='GLUCOSE',]

MRInputObject_prostate_glucose <- mr_input(bx = data_p_glucose$Effect,
                          bxse = data_p_glucose$StdErr,
                          by = data_p_glucose$Effect_p,
                          byse = data_p_glucose$StdErr_p, outcome = 'prostate cancer', exposure='diabetes',snps=data_p_glucose$SNP)

MRAllObject_all_prostate <- mr_allmethods(MRInputObject_prostate_glucose, method = "all")
MRAllObject_all_prostate 
##                     Method Estimate Std Error 95% CI        P-value
##              Simple median    0.030     0.061  -0.089 0.149   0.622
##            Weighted median   -0.057     0.052  -0.159 0.045   0.274
##  Penalized weighted median   -0.057     0.052  -0.159 0.045   0.274
##                                                                    
##                        IVW   -0.017     0.038  -0.092 0.059   0.662
##              Penalized IVW   -0.017     0.038  -0.092 0.059   0.662
##                 Robust IVW   -0.018     0.040  -0.097 0.061   0.662
##       Penalized robust IVW   -0.018     0.040  -0.097 0.061   0.662
##                                                                    
##                   MR-Egger   -0.137     0.125  -0.383 0.108   0.273
##                (intercept)    0.010     0.010  -0.009 0.030   0.312
##         Penalized MR-Egger   -0.137     0.125  -0.383 0.108   0.273
##                (intercept)    0.010     0.010  -0.009 0.030   0.312
##            Robust MR-Egger   -0.165     0.723  -1.583 1.252   0.819
##                (intercept)    0.010     0.022  -0.033 0.054   0.636
##  Penalized robust MR-Egger   -0.165     0.723  -1.583 1.252   0.819
##                (intercept)    0.010     0.022  -0.033 0.054   0.636
mr_plot(MRInputObject_prostate_glucose,
        error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_prostate)

Lipids

data_p<-cbind(data_p, groups_p)
data_p_lipid<-data_p[data_p$TRAIT=='LIPID',]

MRInputObject_prostate_lipid <- mr_input(bx = data_p_lipid$Effect,
                          bxse = data_p_lipid$StdErr,
                          by = data_p_lipid$Effect_p,
                          byse = data_p_lipid$StdErr_p, outcome = 'prostate cancer', exposure='diabetes',snps=data_p_lipid$SNP)

MRAllObject_all_prostate <- mr_allmethods(MRInputObject_prostate_lipid, method = "all")
MRAllObject_all_prostate 
##                     Method Estimate Std Error 95% CI         P-value
##              Simple median   -0.168     0.209  -0.576  0.241   0.421
##            Weighted median   -0.227     0.135  -0.491  0.037   0.092
##  Penalized weighted median   -0.227     0.135  -0.491  0.037   0.092
##                                                                     
##                        IVW   -0.207     0.105  -0.413 -0.001   0.049
##              Penalized IVW   -0.207     0.105  -0.413 -0.001   0.049
##                 Robust IVW   -0.207     0.072  -0.349 -0.066   0.004
##       Penalized robust IVW   -0.207     0.072  -0.349 -0.066   0.004
##                                                                     
##                   MR-Egger   -0.287     0.221  -0.720  0.146   0.194
##                (intercept)    0.004     0.009  -0.014  0.022   0.680
##         Penalized MR-Egger   -0.287     0.221  -0.720  0.146   0.194
##                (intercept)    0.004     0.009  -0.014  0.022   0.680
##            Robust MR-Egger   -0.288     0.067  -0.418 -0.157   0.000
##                (intercept)    0.004     0.004  -0.005  0.012   0.375
##  Penalized robust MR-Egger   -0.288     0.067  -0.418 -0.157   0.000
##                (intercept)    0.004     0.004  -0.005  0.012   0.375
mr_plot(MRInputObject_prostate_lipid,
        error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_prostate)